# =============================================================================
# APPENDIX O TABLE 16A: FALSE POSITIVE ANALYSIS (BOIX & SVOLIK)
# False positive analysis using survival models for Boix & Svolik replication
# =============================================================================

# --- PACKAGES ----------------------------------------------------------------
library(survival)
library(survminer)
library(tidyverse)
library(flexsurv)
library(matrixStats)
library(haven)
library(xtable)
library(texreg)
library(dplyr)

# --- DATA LOADING ------------------------------------------------------------
data <- read_dta("leader_tvc_2.dta", encoding = "latin1") %>%
  mutate(COWcode = ccode)
latent <- read.csv("estimates_independent.csv")

# --- DATA PREPARATION --------------------------------------------------------
# Restricted sample preparation (original models run only on new measure sample)
df_restricted <- data %>%
  semi_join(latent, by = c("COWcode", "year")) %>%
  arrange(COWcode, year, leadid) %>%
  group_by(leadid) %>%
  mutate(time0 = lag(time, default = 0)) %>%
  ungroup()

# Replication sample preparation with merged data
merged <- merge(latent, data, by = c("COWcode", "year")) %>%
  arrange(COWcode, year) %>%
  group_by(COWcode) %>%
  mutate(
    leg_growth_new = dyn.estimates * growth_1,
    dyn.estimates_lag = lag(dyn.estimates, n = 1),
    dyn.up_lag = lag(dyn.up, n = 1),
    dyn.lo_lag = lag(dyn.lo, n = 1)
  ) %>%
  ungroup()

df_new <- merged %>%
  group_by(leadid) %>%
  mutate(time0 = lag(time, default = 0)) %>%
  ungroup() %>%
  mutate(
    dyn_quartile = ntile(dyn.estimates, 4),
    growth_power = dyn.estimates_lag * chgdpen_fearonlaitin,
    quartile_label = factor(dyn_quartile, levels = 1:4,
                            labels = c("Quartile 1", "Quartile 2", "Quartile 3", "Quartile 4"))
  )

# Create false positive indicator
df_new <- df_new %>%
  mutate(
    dyn_quartile_lag = ntile(dyn.estimates_lag, 4),
    false_positive = ifelse(legislature == 1 & dyn_quartile_lag == 1, 1, 0)
  )

# --- ANALYSIS ----------------------------------------------------------------
# Create survival objects
surv_object_coup_restricted    <- Surv(df_restricted$time0, df_restricted$time, df_restricted$c_coup)
surv_object_natural_restricted <- Surv(df_restricted$time0, df_restricted$time, df_restricted$c_natural)
surv_object_revolt_restricted  <- Surv(df_restricted$time0, df_restricted$time, df_restricted$c_revolt)

surv_object_coup_new    <- Surv(df_new$time0, df_new$time, df_new$c_coup)
surv_object_natural_new <- Surv(df_new$time0, df_new$time, df_new$c_natural)
surv_object_revolt_new  <- Surv(df_new$time0, df_new$time, df_new$c_revolt)

# Fit survival models with false positive indicator
fit_coups_new <- flexsurvreg(
  surv_object_coup_new ~ false_positive + gdp_1k + chgdpen_fearonlaitin + 
    Oil_fearonlaitin + postcoldwarlag + civiliandictatorshiplag + 
    militarydictatorshiplag + communist + lpopl1_fearonlaitin + 
    ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_new, 
  dist = "weibull"
)

fit_revolt_new <- flexsurvreg(
  surv_object_revolt_new ~ false_positive + gdp_1k + chgdpen_fearonlaitin + 
    Oil_fearonlaitin + postcoldwarlag + civiliandictatorshiplag + 
    militarydictatorshiplag + communist + lpopl1_fearonlaitin + 
    ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_new, 
  dist = "weibull"
)

# --- OUTPUT ------------------------------------------------------------------
texreg(list(fit_coups_new, fit_revolt_new),
       custom.model.names = c("Coups", "Revolts"),
       caption = "False Positive Effects on Leader Exit (Survival Analysis)",
       caption.above = TRUE)
